#Install and load relevant packages
install.packages(c("mixlm","CBPS","mgcv","drtmle")) 
library(mixlm)
library(CBPS)
library(mgcv)
library(drtmle)

#Configure the sample size n, your beta value, and the number of repetitions, r.
n=300
beta <- 0
r=50

################## CORRECT OUTCOME MODEL, CORRECT PROPENSITY MODEL #################

#Initialize.
ATE_true <- rep(NA,r)
ATE_glm <- rep(NA,r)
ATE_cbps <- rep(NA,r)
ATE_ocbps <- rep(NA,r)
ATE_gam <- rep(NA,r)
ATE_dr <- rep(NA,r)

bias_true <- rep(NA,r)
bias_glm <- rep(NA,r)
bias_cbps <- rep(NA,r)
bias_ocbps <- rep(NA,r)
bias_gam <- rep(NA,r)
bias_dr <- rep(NA,r)

lower_true <- rep(NA,r)
upper_true <- rep(NA,r)
coverage_true <- rep(NA,r)

lower_glm <- rep(NA,r)
upper_glm <- rep(NA,r)
coverage_glm <- rep(NA,r)

lower_gam <- rep(NA,r)
upper_gam <- rep(NA,r)
coverage_gam <- rep(NA,r)

#drtmle package provides a function for confidence intervals, so we don't need to initialize the CI vectors.
coverage_dr <- rep(NA,r)

lower_cbps <- rep(NA,r)
upper_cbps <- rep(NA,r)
coverage_cbps <- rep(NA,r)

lower_ocbps <- rep(NA,r)
upper_ocbps <- rep(NA,r)
coverage_ocbps <- rep(NA,r)

#Generate seeds to use in each iteration for reproducible results.
set.seed(123456)

for(j in 1:r){
  
  #Initialize the X matrix.
  
  #Generate the covariates according to the simulation settings in the paper.
  X_1 <- rnorm(n,3,sqrt(2))
  X_2 <- rnorm(n,0,1)
  X_3 <- rnorm(n,0,1)
  X_4 <- rnorm(n,0,1)
  
  X_mat <- as.matrix(cbind(rep(1,n), X_1, X_2, X_3, X_4))
  
  #Initialize the Y_1 and Y_0 vector according to the simulation settings in the paper.
  Y_1 <- X_mat %*% matrix(c(200, 27.4, 13.7, 13.7, 13.7), 5, 1) + rnorm(n)
  Y_0 <- X_mat %*% matrix(c(200, 0 , 13.7, 13.7, 13.7), 5, 1) + rnorm(n)
  
  #True Propensity Score calculation.
  pre_prop <- X_mat[,2:5] %*% matrix(c(-beta, 0.5, -0.25, -0.1), 4, 1)
  propensity_true <- (exp(pre_prop))/(1+(exp(pre_prop)))
  
  #Generate T_vec, the binary vector of treatment assignments, with the true propensity scores.
  T_vec <- rbinom(n, size=1, prob=propensity_true)
  
  #Now generate the actual outcome, Y_outcome.
  Y_outcome <- Y_1*T_vec + Y_0*(1-T_vec)
  
  #First, fit the linear regression model for T=1 and T=0.
  Y_outcome_1 <- Y_outcome[which(T_vec==1)]
  Y_outcome_0 <- Y_outcome[which(T_vec==0)]
  
  X_mat_1 <- X_mat[which(T_vec==1),c(2,3,4,5)]
  X_mat_0 <- X_mat[which(T_vec==0),c(2,3,4,5)]
  
  lin_1 <- lm(Y_outcome_1 ~ X_mat_1) 
  lin_0 <- lm(Y_outcome_0 ~ X_mat_0)
  
  sigma_hat_1 <- summary(lin_1)$s #residual standard error for T=1
  sigma_hat_0 <- summary(lin_0)$s #residual standard error for T=0
  
  fitted_1 <- X_mat%*%as.matrix(lin_1$coefficients,5,1) #Y_hat(1|X_i) values 
  fitted_0 <- X_mat%*%as.matrix(lin_0$coefficients,5,1) #Y_hat(0|X_i) values
  
  # #The next 5 lines are an alternative, more general way to get an estimate of the error variances.
  # fitted_1_actual <- fitted_1[which(T_vec==1)] #The fitted Y(1) values for the actual treated group
  # fitted_0_actual <- fitted_0[which(T_vec==0)] #The fitted Y(0) values for the actual control group
  
  # resid_1 <- Y_outcome_1 - fitted_1_actual
  # resid_0 <- Y_outcome_0 - fitted_0_actual
  
  # #The next 4 lines are an alternative, more general way to get the estimate of Var(Y(1)|X) and Var(Y(0)|X). 
  # #NOTE: We are using the RSE (Residual Standard Error) for the estimation and thus we divide by n-p, not n-1.
  # #(The RSE is an unbiased estimator for the error variance.)
  # sqrt((sum((resid_1)^2))/(length(resid_1)-ncol(X_mat)))
  # sqrt((sum((resid_0)^2))/(length(resid_0)-ncol(X_mat)))
  
  L_hat <- fitted_1 - fitted_0
  K_hat <- fitted_0
  
  
  ####### TRUE IPTW #######
  #Calculating the ATE.
  ATE_true[j] <- mean((T_vec*Y_outcome/propensity_true) - (((1-T_vec)*Y_outcome)/(1-propensity_true)))
  
  #Calculate the bias:
  bias_true[j] <- ATE_true[j] - 82.2
  
  asy_var_true_hat <- mean((fitted_1)^2/propensity_true + (fitted_0)^2/(1-propensity_true) - (rep(ATE_true[j],n))^2)
  
  #Calculate the 95% CI.
  diff_true <- 1.96*sqrt(asy_var_true_hat/n)
  lower_true[j] <- ATE_true[j] - diff_true
  upper_true[j] <- ATE_true[j] + diff_true
  
  #Check if the true value of mu=82.2 is within this CI.
  if(82.2 >= lower_true[j] & 82.2 <= upper_true[j]){
    coverage_true[j] <- 1
  }else{
    coverage_true[j] <- 0
  }
  
  
  ####### GLM IPTW #######
  #Calculating the ATE.
  glm.fit <- glm(T_vec ~ X_1 + X_2 + X_3 + X_4, family="binomial", REML=FALSE)
  pre_prop_glm <- X_mat[,1:5] %*% matrix(coefficients(glm.fit), 5, 1)
  propensity_glm <- (exp(pre_prop_glm))/(1+(exp(pre_prop_glm)))
  ATE_glm[j] <- mean((T_vec*Y_outcome/propensity_glm) - (((1-T_vec)*Y_outcome)/(1-propensity_glm)))
  
  #Calculate the bias:
  bias_glm[j] <- ATE_glm[j] - 82.2
  
  #95% CI 
  #Fisher Information Matrix
  I <- array(rep(NA,5*5*n),dim=c(5,5,n))
  
  for(i in 1:n){
    exp_term <- exp(t(coefficients(glm.fit))%*%matrix(X_mat[i,],5,1))
    I[,,i] <- -matrix(rep(exp_term/((1+exp_term)^2),5*5),5,5)*(matrix(X_mat[i,],5,1)%*%matrix(X_mat[i,],1,5))
  }
  
  I_mat <- -apply(I,c(1,2),mean)
  
  #H_0_hat_glm
  prop_modified_glm <- matrix(propensity_glm,1,n)/(1+exp(matrix(coefficients(glm.fit),1,5)%*%t(X_mat)))
  der_mat_glm <- matrix(rep(prop_modified_glm,each=5),5,n)
  derivative_glm <- matrix(rep(NA,5*n),5,n)
  for(i in 1:n){
    derivative_glm[,i] <- matrix(der_mat_glm[,i]*X_mat[i,],5,1) 
  }
  
  temp_sum <- matrix(rep(0,5),5,1)
  for(i in 1:n){
    temp_sum <- temp_sum + derivative_glm[,i]*(K_hat[i] + (1-propensity_glm[i])*L_hat[i])/(propensity_glm[i]*(1-propensity_glm[i]))
  }
  H_0_hat_glm <- -temp_sum/n
  
  #Sigma_mu_glm
  Sigma_mu_glm <- mean((fitted_1)^2/propensity_glm + (fitted_0)^2/(1-propensity_glm)) - (ATE_glm[j])^2
  
  #Asymptotic Variance
  asy_var_glm_hat <- Sigma_mu_glm - t(H_0_hat_glm)%*%solve(I_mat)%*%H_0_hat_glm
  
  #Calculate the 95% CI.
  diff_glm <- 1.96*sqrt(asy_var_glm_hat/n)
  lower_glm[j] <- ATE_glm[j] - diff_glm
  upper_glm[j] <- ATE_glm[j] + diff_glm
  
  #Check if the true value of mu=82.2 is within this CI.
  if(82.2 >= lower_glm[j] & 82.2 <= upper_glm[j]){
    coverage_glm[j] <- 1
  }else{
    coverage_glm[j] <- 0
  }
  
  
  ####### CBPS Method #######
  #Calculating the ATE.
  cbps.fit <- CBPS(T_vec ~ X_mat, ATT=0, method="exact")
  propensity_cbps <- fitted.values(cbps.fit)
  ATE_cbps[j] <- mean((T_vec*Y_outcome/propensity_cbps) - (((1-T_vec)*Y_outcome)/(1-propensity_cbps)))
  
  #Calculate the bias:
  bias_cbps[j] <- ATE_cbps[j] - 82.2
  
  #Calculate the 95% CI
  #omega_hat (Look at the Var(g_beta) formula on page 7 in the paper! 
  #Because this is omega_hat according to page 35 right under (A.2).)
  new_X_2 <- array(rep(NA,5*5*n),dim=c(5,5,n))
  for(i in 1:n){
    new_X_2[,,i] <- matrix(X_mat[i,],5,1)%*%t(X_mat[i,])/(propensity_cbps[i]*(1-propensity_cbps[i]))
  }
  omega_hat <- apply(new_X_2,c(1,2),mean)
  
  #Sigma_mu_hat
  Sigma_mu_hat <- mean(((fitted_1)^2/propensity_cbps) + ((fitted_0)^2/(1-propensity_cbps))) - (ATE_cbps[j])^2
  
  #Cov(mu_beta, g_beta)
  L_hat <- fitted_1 - fitted_0
  K_hat <- fitted_0
  
  new_X_3 <- matrix(rep(NA,n*5),n,5)
  for(i in 1:n){
    new_X_3[i,] <- X_mat[i,]*(K_hat[i] + (1-propensity_cbps[i])*L_hat[i])/(propensity_cbps[i]*(1-propensity_cbps[i]))
  }
  
  cov_hat <- apply(new_X_3,2,mean)
  
  #H_0_hat
  prop_modified <- propensity_cbps/(1+exp(matrix(cbps.fit$coefficients,1,5)%*%t(X_mat)))
  der_mat <- matrix(rep(prop_modified,each=5),5,n)
  derivative <- matrix(rep(NA,5*n),5,n)
  for(i in 1:n){
    derivative[,i] <- matrix(der_mat[,i]*X_mat[i,],5,1) 
  }
  
  temp_sum <- matrix(rep(0,5),5,1)
  for(i in 1:n){
    temp_sum <- temp_sum + derivative[,i]*(K_hat[i] + (1-propensity_cbps[i])*L_hat[i])/(propensity_cbps[i]*(1-propensity_cbps[i]))
  }
  H_0_hat <- -temp_sum/n
  
  #H_f_hat
  temp_sum_2 <- matrix(rep(0,5*5),5,5)
  for(i in 1:n){
    temp_sum_2 <- temp_sum_2 + ( matrix(X_mat[i,],5,1)%*%matrix(derivative[,i],1,5)/(propensity_cbps[i]*(1-propensity_cbps[i]))  )
  }
  H_f_hat <- -temp_sum_2/n
  
  #Put it all together
  asy_var_cbps_hat <- Sigma_mu_hat + t(H_0_hat)%*%solve(t(H_f_hat)%*%solve(omega_hat)%*%H_f_hat)%*%H_0_hat -
    2*t(H_0_hat)%*%solve(H_f_hat)%*%cov_hat
  
  #Calculate the 95% CI.
  diff_cbps <- 1.96*sqrt(asy_var_cbps_hat/n)
  lower_cbps[j] <- ATE_cbps[j] - diff_cbps
  upper_cbps[j] <- ATE_cbps[j] + diff_cbps
  
  
  #Check if the true value of mu=82.2 is within this CI.
  if(82.2 >= lower_cbps[j] & 82.2 <= upper_cbps[j]){
    coverage_cbps[j] <- 1
  }else{
    coverage_cbps[j] <- 0
  }
  
  
  ####### oCBPS Method #######
  #Calculating the ATE.
  ocbps.fit <- CBPS(T_vec ~ X_mat, ATT=0, baseline.formula = ~X_mat[,c(1,3:5)], diff.formula = ~X_mat[,2])
  propensity_ocbps <- fitted.values(ocbps.fit)
  ATE_ocbps[j] <- mean((T_vec*Y_outcome/propensity_ocbps) - (((1-T_vec)*Y_outcome)/(1-propensity_ocbps)))
  
  #Calculate the bias:
  bias_ocbps[j] <- ATE_ocbps[j] - 82.2
  
  #Calculate the 95% CI.
  L_hat <- fitted_1 - fitted_0
  
  asy_var_ocbps_hat <- mean((sigma_hat_1^2)/propensity_ocbps + (sigma_hat_0^2)/(1-propensity_ocbps) + (L_hat - rep(ATE_ocbps[j],n))^2)
  
  diff_ocbps <- 1.96*sqrt(asy_var_ocbps_hat/n)
  lower_ocbps[j] <- ATE_ocbps[j] - diff_ocbps
  upper_ocbps[j] <- ATE_ocbps[j] + diff_ocbps
  
  
  #Check if the true value of mu=82.2 is within this CI.
  if(82.2 >= lower_ocbps[j] & 82.2 <= upper_ocbps[j]){
    coverage_ocbps[j] <- 1
  }else{
    coverage_ocbps[j] <- 0
  }
  
  
  
  ####### GAM IPTW #######
  #Calculating the ATE.
  gam.fit <- gam(T_vec ~ s(X_1) + s(X_2) + s(X_3) + s(X_4), family="binomial",method="REML")
  pre_prop_gam <- as.matrix(predict(gam.fit))
  propensity_gam <- (exp(pre_prop_gam))/(1+(exp(pre_prop_gam)))
  ATE_gam[j] <- mean((T_vec*Y_outcome/propensity_gam) - (((1-T_vec)*Y_outcome)/(1-propensity_gam)))
  if(is.nan(ATE_gam[j])==TRUE){
    break
  }
  
  #Calculate the bias:
  bias_gam[j] <- ATE_gam[j] - 82.2
  
  asy_var_gam_hat <- mean((sigma_hat_1^2)/propensity_gam + (sigma_hat_0^2)/(1-propensity_gam) + (L_hat - rep(ATE_gam[j],n))^2)
  
  #Calculate the 95% CI.
  diff_gam <- 1.96*sqrt(asy_var_gam_hat/n) 
  lower_gam[j] <- ATE_gam[j] - diff_gam
  upper_gam[j] <- ATE_gam[j] + diff_gam
  
  
  #Check if the true value of mu=82.2 is within this CI.
  if(82.2 >= lower_gam[j] & 82.2 <= upper_gam[j]){
    coverage_gam[j] <- 1
  }else{
    coverage_gam[j] <- 0
  }
  
  
  ####### DR #######
  #Calculating the ATE.
  npreg_fit <- drtmle(W = as.data.frame(cbind(X_1,X_2,X_3,X_4)),
            A = T_vec,
            a_0 = c(0,1),
            Y = Y_outcome, family = binomial(),
            SL_g = "SL.npreg", 
            SL_Q = "SL.npreg",
            SL_gr = "SL.npreg", 
            SL_Qr = "SL.npreg",
            stratify = TRUE, maxIter=1)
    
  ATE_dr[j] <- npreg_fit$drtmle$est[2] - npreg_fit$drtmle$est[1]
  
  #Calculate the bias:
  bias_dr[j] <- ATE_dr[j] - 82.2
   
  #Calculate the 95% CI.
  ci_dr <- ci(npreg_fit,contrast=c(-1,1)) 
  
  #Check if the true value of mu=82.2 is within this CI.
  if(82.2 >= ci_dr$drtmle[1,2] & 82.2 <= ci_dr$drtmle[1,3]){
    coverage_dr[j] <- 1
  }else{
    coverage_dr[j] <- 0
  }

  print(j)
}

#True IPTW results
Bias_true <- mean(bias_true)
SD_true <- sd(ATE_true)
RMSE_true <- sqrt((sd(ATE_true))^2 + (mean(bias_true))^2)
coverage_prob_true <- mean(coverage_true)

#GLM IPTW results
Bias_glm <- mean(bias_glm)
SD_glm <- sd(ATE_glm)
RMSE_glm <- sqrt((sd(ATE_glm))^2 + (mean(bias_glm))^2)
coverage_prob_glm <- mean(coverage_glm)

#CBPS results
Bias_cbps <- mean(bias_cbps)
SD_cbps <- sd(ATE_cbps)
RMSE_cbps <- sqrt((sd(ATE_cbps))^2 + (mean(bias_cbps))^2)
coverage_prob_cbps <- mean(coverage_cbps)

#oCBPS results
Bias_ocbps <- mean(bias_ocbps)
SD_ocbps <- sd(ATE_ocbps)
RMSE_ocbps <- sqrt((sd(ATE_ocbps))^2 + (mean(bias_ocbps))^2)
coverage_prob_ocbps <- mean(coverage_ocbps)

#GAM results
Bias_gam <- mean(bias_gam)
SD_gam <- sd(ATE_gam)
RMSE_gam <- sqrt((sd(ATE_gam))^2 + (mean(bias_gam))^2)
coverage_prob_gam <- mean(coverage_gam)

#DR results
Bias_dr <- mean(bias_dr)
SD_dr <- sd(ATE_dr)
RMSE_dr <- sqrt((sd(ATE_dr))^2 + (mean(bias_dr))^2)
coverage_prob_dr <- mean(coverage_dr)


temp <- matrix(c(round(c(Bias_true, SD_true, RMSE_true),2), 
                 round(c(Bias_glm, SD_glm, RMSE_glm),2),
                 round(c(Bias_gam, SD_gam, RMSE_gam),2),
                 round(c(Bias_dr, SD_dr, RMSE_dr),2),
                 round(c(Bias_cbps, SD_cbps, RMSE_cbps),2),
                 round(c(Bias_ocbps, SD_ocbps, RMSE_ocbps),2)),
               nrow=6,ncol=3,byrow=T)

coverage <- c(coverage_prob_true, coverage_prob_glm, coverage_prob_gam, coverage_prob_dr, coverage_prob_cbps, coverage_prob_ocbps)
results_truetrue <- matrix(temp,18,1)
results_truetrue <- c(results_truetrue,coverage)

TT_results <- matrix(results_truetrue,24,1)

rownames(TT_results)= c("BIAS True", "BIAS GLM", "BIAS GAM", "BIAS DR", "BIAS CBPS", "BIAS oCBPS", 
                               "SD True", "SD GLM", "SD GAM", "SD DR", "SD CBPS", "SD oCBPS",
                               "RMSE True", "RMSE GLM", "RMSE GAM", "RMSE DR", "RMSE CBPS", "RMSE oCBPS",
                               "Coverage Prob. True", "Coverage Prob. GLM", "Coverage Prob. GAM", "Coverage Prob. DR",
                               "Coverage Prob. CBPS", "Coverage Prob. oCBPS")

colnames(TT_results) = "Correct Outcome Model, Correct Propensity Score Model"

#Final results in table form with relevant labels
TT_results